# Data Visualization Project

# load data&library
df <- read.csv('~/Desktop/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training\ Exercises/Capstone\ and\ Data\ Viz\ Projects/Data\ Visualization\ Project/Economist_Assignment_Data.csv')
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- select(df, -1)

head(df)
##       Country HDI.Rank   HDI CPI            Region
## 1 Afghanistan      172 0.398 1.5      Asia Pacific
## 2     Albania       70 0.739 3.1 East EU Cemt Asia
## 3     Algeria       96 0.698 2.9              MENA
## 4      Angola      148 0.486 2.0               SSA
## 5   Argentina       45 0.797 3.0          Americas
## 6     Armenia       86 0.716 2.6 East EU Cemt Asia
# To create a scatter plot of x=CPI and y=HDI

pl <- ggplot(df, aes(x=CPI, y=HDI, color = Region)) + geom_point()
pl

# Change the points to be larger empty circles.
# use "shape = " inside the geom_point to change the point shape

pl1 <- ggplot(df, aes(x=CPI, y=HDI, color = Region)) + geom_point(size=5, shape = 1)
pl1

# Add a trend line

pl2 <- pl1 + geom_smooth(aes(group = 1))
pl2
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# To edit this trend line

pl3 <- pl1 + geom_smooth(aes(group = 1), method = 'lm', formula = y ~ log(x), se = FALSE, color = 'red')
pl3

# To add text labels on the scatter points

pl4 <- pl3 + geom_text(aes(label = Country))
pl4

# To show only the selected countries label we want

selected_Country_Label <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",
                   "Afghanistan", "Congo", "Greece", "Argentina", "Brazil",
                   "India", "Italy", "China", "South Africa", "Spane",
                   "Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",
                   "United States", "Germany", "Britain", "Barbados", "Norway", "Japan",
                   "New Zealand", "Singapore")

pl5 <- pl3 + geom_text(aes(label = Country), color = 'gray20',
                      data = subset(df, Country %in% selected_Country_Label),
                      check_overlap = T)
pl5

# Change theme

pl6 <- pl5 + theme_bw()
pl6

# To modify scale of Variable X, CPI, and scale of Variable Y, HDI

pl7 <- pl6 + scale_x_continuous(name = 'Corruption Perceptions Index, 2011 (10 = Least Corrupt)',
                                limits = c(0.9,10.5), breaks = 1:10)
pl7

pl8 <- pl7 + scale_y_continuous(name = 'Human Development Index, 2011 (1 = Best)',
                                limits = c(0.2,1))
pl8

# Add a title

pl9 <- pl8 + ggtitle('Corruption and Human Development')
pl9